import requests
# import parser
import re
from lxml import etree

url = 'https://www.kanxiaojiejie.com/page/1'

resp = requests.get(url)

# 网页源代码
data_html = resp.text
# print(data_html)

# 数据解析
# src="https://t1.ledchuzu.com/2022/06/22/bdccaec59875412109a1f917423235c7.jpg" class="attachment-gridzone-medium-h size-gridzone-medium-h wp-post-image" alt="20220627-5" title="20220627-5" title="20220627-5" loading="lazy" />

obj = re.compile(r'https://t1.ledchuzu.com/.*?/.*?/.*?.jpg')
# urls = obj.search(data_html)
# print(urls.group('src'))
srcs = obj.findall(data_html)

# tree = etree.XML(data_html)
# texts = tree.xpath('//*[@id="post-9347"]/div/h2/a/text()')
# print(texts)

# print(len(srcs))
for src in srcs:
    # print(src)
    img = requests.get(src).content     # 获取图片二进制数据
    # 保存数据
    src = str(src)
    name = src.split('/')[-1]

    with open(f'{name}', mode='w') as f:
        f.write(img)
        print(f'{name}爬取完毕！！')